From 0e81c9938ed27d7f9b83d2116c4127ae9d827722 Mon Sep 17 00:00:00 2001 From: "kfraser@localhost.localdomain" Date: Mon, 31 Jul 2006 17:29:00 +0100 Subject: [PATCH] [NET] back: Added tx queue This patch adds a tx queue to the backend if the frontend supports rx refill notification. A queue is needed because SG/TSO greatly reduces the number of packets that can be stored in the rx ring. Given an rx ring with 256 entries, a maximum TSO packet can occupy as many as 18 entries, meaning that the entire ring can only hold 14 packets. This is too small at high bandwidths with large TCP RX windows. Having a tx queue does not present a new security risk as the queue is a fixed size buffer just like the rx ring. So each guest can only hold a fixed amount of memory (proportional to the tx queue length) on the host. Signed-off-by: Herbert Xu Cannot BUG_ON netbk_queue_full yet !netbk_can_queue, as this can be triggered by a misbehaving client. Set req_event appropriately when stopping the packet queue, or we will not receive a notification. Signed-off-by: Keir Fraser --- .../drivers/xen/netback/common.h | 10 ++++++ .../drivers/xen/netback/interface.c | 9 ++--- .../drivers/xen/netback/netback.c | 36 ++++++++++++++++--- .../drivers/xen/netback/xenbus.c | 10 ++++++ 4 files changed, 57 insertions(+), 8 deletions(-) diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/common.h b/linux-2.6-xen-sparse/drivers/xen/netback/common.h index 3ba0251dca..2783020a23 100644 --- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h +++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h @@ -76,6 +76,10 @@ typedef struct netif_st { struct vm_struct *tx_comms_area; struct vm_struct *rx_comms_area; + /* Set of features that can be turned on in dev->features. */ + int features; + int can_queue; + /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */ RING_IDX rx_req_cons_peek; @@ -119,4 +123,10 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev); struct net_device_stats *netif_be_get_stats(struct net_device *dev); irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs); +static inline int netbk_can_queue(struct net_device *dev) +{ + netif_t *netif = netdev_priv(dev); + return netif->can_queue; +} + #endif /* __NETIF__BACKEND__COMMON_H__ */ diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c index b5decfd3ea..4253008e51 100644 --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c @@ -51,14 +51,12 @@ static int net_open(struct net_device *dev) netif_t *netif = netdev_priv(dev); if (netif_carrier_ok(dev)) __netif_up(netif); - netif_start_queue(dev); return 0; } static int net_close(struct net_device *dev) { netif_t *netif = netdev_priv(dev); - netif_stop_queue(dev); if (netif_carrier_ok(dev)) __netif_down(netif); return 0; @@ -107,8 +105,11 @@ netif_t *netif_alloc(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]) SET_ETHTOOL_OPS(dev, &network_ethtool_ops); - /* Disable queuing. */ - dev->tx_queue_len = 0; + /* + * Reduce default TX queuelen so that each guest interface only + * allows it to eat around 6.4MB of host memory. + */ + dev->tx_queue_len = 100; for (i = 0; i < ETH_ALEN; i++) if (be_mac[i] != 0) diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c index 80d16b7576..4fa33527ec 100644 --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c @@ -136,6 +136,14 @@ static inline int is_xen_skb(struct sk_buff *skb) return (cp == skbuff_cachep); } +static inline int netbk_queue_full(netif_t *netif) +{ + RING_IDX peek = netif->rx_req_cons_peek; + + return ((netif->rx.sring->req_prod - peek) <= 0) || + ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) <= 0); +} + int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) { netif_t *netif = netdev_priv(dev); @@ -143,12 +151,16 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) BUG_ON(skb->dev != dev); /* Drop the packet if the target domain has no receive buffers. */ - if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev)) || - (netif->rx_req_cons_peek == netif->rx.sring->req_prod) || - ((netif->rx_req_cons_peek - netif->rx.rsp_prod_pvt) == - NET_RX_RING_SIZE)) + if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev))) goto drop; + if (unlikely(netbk_queue_full(netif))) { + /* Not a BUG_ON() -- misbehaving netfront can trigger this. */ + if (netbk_can_queue(dev)) + DPRINTK("Queue full but not stopped!\n"); + goto drop; + } + /* * We do not copy the packet unless: * 1. The data is shared; or @@ -178,6 +190,13 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) netif->rx_req_cons_peek++; netif_get(netif); + if (netbk_can_queue(dev) && netbk_queue_full(netif)) { + netif->rx.sring->req_event = netif->rx_req_cons_peek + 1; + mb(); /* request notification /then/ check & stop the queue */ + if (netbk_queue_full(netif)) + netif_stop_queue(dev); + } + skb_queue_tail(&rx_queue, skb); tasklet_schedule(&net_rx_tasklet); @@ -351,6 +370,10 @@ static void net_rx_action(unsigned long unused) notify_list[notify_nr++] = irq; } + if (netif_queue_stopped(netif->dev) && + !netbk_queue_full(netif)) + netif_wake_queue(netif->dev); + netif_put(netif); dev_kfree_skb(skb); gop++; @@ -974,8 +997,13 @@ static void netif_page_release(struct page *page) irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs) { netif_t *netif = dev_id; + add_to_net_schedule_list_tail(netif); maybe_schedule_tx_action(); + + if (netif_queue_stopped(netif->dev) && !netbk_queue_full(netif)) + netif_wake_queue(netif->dev); + return IRQ_HANDLED; } diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c index fd3d70459e..bcc9a794bd 100644 --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c @@ -353,6 +353,7 @@ static int connect_rings(struct backend_info *be) unsigned long tx_ring_ref, rx_ring_ref; unsigned int evtchn; int err; + int val; DPRINTK(""); @@ -367,6 +368,15 @@ static int connect_rings(struct backend_info *be) return err; } + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-notify", "%d", + &val) < 0) + val = 0; + if (val) + be->netif->can_queue = 1; + else + /* Must be non-zero for pfifo_fast to work. */ + be->netif->dev->tx_queue_len = 1; + /* Map the shared frame, irq etc. */ err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn); if (err) { -- 2.30.2